Initialization Bock

In [1]:
#!pip install tensorflow==1.12.0
In [2]:
# Initialization
colab_run=False
anc_box= True
_grid_offset=True
train=True
test_mode=1
data_save=True
train_dir='VOC/VOC2007/'
val_dir='VOC/VOC2007_test/'
test_dir='VOC/VOC2007_test/'
if colab_run==True:
  !pip install pydrive

  from pydrive.auth import GoogleAuth
  from pydrive.drive import GoogleDrive
  from google.colab import auth
  from oauth2client.client import GoogleCredentials

  auth.authenticate_user()
  gauth = GoogleAuth()
  gauth.credentials = GoogleCredentials.get_application_default()
  drive = GoogleDrive(gauth)

  import os, cv2
  from google.colab import drive
  drive.mount('/content/drive/')

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Reshape, Activation, Conv2D, Input, MaxPooling2D, BatchNormalization, Flatten, Dense, Lambda
from tensorflow.keras.layers import LeakyReLU
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from tensorflow.keras.optimizers import SGD, Adam, RMSprop
from tensorflow.keras.layers import concatenate
from tensorflow.keras.callbacks import ModelCheckpoint
from matplotlib import pyplot as plt
import tensorflow.keras.backend as K
import tensorflow as tf
import numpy as np
import pickle
import os, cv2

#os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
#os.environ["CUDA_VISIBLE_DEVICES"] = ""

# %matplotlib inline
C:\Users\irfan\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\framework\dtypes.py:523: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
C:\Users\irfan\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\framework\dtypes.py:524: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
C:\Users\irfan\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\framework\dtypes.py:525: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
C:\Users\irfan\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\framework\dtypes.py:526: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
C:\Users\irfan\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\framework\dtypes.py:527: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
C:\Users\irfan\Anaconda3\envs\tf\lib\site-packages\tensorflow\python\framework\dtypes.py:532: FutureWarning: Passing (type, 1) or '1type' as a synonym of type is deprecated; in a future version of numpy, it will be understood as (type, (1,)) / '(1,)type'.
  np_resource = np.dtype([("resource", np.ubyte, 1)])

Configure Block

Model Building Block

In [3]:
LABELS=['person', 'bird', 'cat', 'cow', 'dog', 'horse', 'sheep','aeroplane', 'bicycle',
         'boat', 'bus', 'car', 'motorbike', 'train', 'bottle', 'chair','diningtable',
         'pottedplant', 'sofa', 'tvmonitor']#+['None']*60
#f=open('coco.txt','r')
#LABELS=[]
#for line in f:
#    line=line.split(':')
#    LABELS.append(str(line[1][1:-1]))
                  
IMAGE_H, IMAGE_W = 416, 416
GRID_H,  GRID_W  = 13 , 13
BOX              = 5
CLASS            = len(LABELS)
CLASS_WEIGHTS    = np.ones(CLASS, dtype='float32')
OBJ_THRESHOLD    = 0.3#0.5
NMS_THRESHOLD    = 0.3#0.45
#ANCHORS          = [0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828]
ANCHORS          = [1.3221, 1.73145, 3.19275, 4.00944, 5.05587, 8.09892, 9.47112, 4.84053, 11.2364, 10.0071]           
#ANCHORS          = [ 4.469053,2.148582,10.548851,5.381520,11.420664,9.961033,6.517299,3.699693,2.469196,1.599054]
#ANCHORS=[13,13]
#NO_OBJECT_SCALE  = 1.0
#OBJECT_SCALE     = 5.0
#COORD_SCALE      = 1.0
#CLASS_SCALE      = 1.0

BATCH_SIZE       = 10
WARM_UP_BATCHES  = 0
TRUE_BOX_BUFFER  = 50
lambda_coord=5.0
lambda_noobj=0.5

lr=1e-6
epochs=70
optimizer = Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
#optimizer = SGD(lr=1e-4, decay=0.0005, momentum=0.9)
#optimizer = RMSprop(lr=1e-4, rho=0.9, epsilon=1e-08, decay=0.0)
exp_name='v-1.3/'
root='D:/'
path_wts=root+'yolov2.weights'
path_wts_final=root+exp_name+'best.hdf5'
if not os.path.exists(root+exp_name):
    os.mkdir(root+exp_name)
In [4]:
# the function to implement the orgnization layer (thanks to github.com/allanzelener/YAD2K)
def space_to_depth_x2(x):
    return tf.space_to_depth(x, block_size=2)

def build_model():
  input_image = Input(shape=(IMAGE_H, IMAGE_W, 3))
  true_boxes  = Input(shape=(1, 1, 1, TRUE_BOX_BUFFER , 4))

  # Layer 1
  x = Conv2D(32, (3,3), strides=(1,1), padding='same', name='conv_1', use_bias=False)(input_image)
  x = BatchNormalization(name='norm_1')(x)
  x = LeakyReLU(alpha=0.1)(x)
  x = MaxPooling2D(pool_size=(2, 2))(x)

  # Layer 2
  x = Conv2D(64, (3,3), strides=(1,1), padding='same', name='conv_2', use_bias=False)(x)
  x = BatchNormalization(name='norm_2')(x)
  x = LeakyReLU(alpha=0.1)(x)
  x = MaxPooling2D(pool_size=(2, 2))(x)

  # Layer 3
  x = Conv2D(128, (3,3), strides=(1,1), padding='same', name='conv_3', use_bias=False)(x)
  x = BatchNormalization(name='norm_3')(x)
  x = LeakyReLU(alpha=0.1)(x)

  # Layer 4
  x = Conv2D(64, (1,1), strides=(1,1), padding='same', name='conv_4', use_bias=False)(x)
  x = BatchNormalization(name='norm_4')(x)
  x = LeakyReLU(alpha=0.1)(x)

  # Layer 5
  x = Conv2D(128, (3,3), strides=(1,1), padding='same', name='conv_5', use_bias=False)(x)
  x = BatchNormalization(name='norm_5')(x)
  x = LeakyReLU(alpha=0.1)(x)
  x = MaxPooling2D(pool_size=(2, 2))(x)

  # Layer 6
  x = Conv2D(256, (3,3), strides=(1,1), padding='same', name='conv_6', use_bias=False)(x)
  x = BatchNormalization(name='norm_6')(x)
  x = LeakyReLU(alpha=0.1)(x)

  # Layer 7
  x = Conv2D(128, (1,1), strides=(1,1), padding='same', name='conv_7', use_bias=False)(x)
  x = BatchNormalization(name='norm_7')(x)
  x = LeakyReLU(alpha=0.1)(x)

  # Layer 8
  x = Conv2D(256, (3,3), strides=(1,1), padding='same', name='conv_8', use_bias=False)(x)
  x = BatchNormalization(name='norm_8')(x)
  x = LeakyReLU(alpha=0.1)(x)
  x = MaxPooling2D(pool_size=(2, 2))(x)

  # Layer 9
  x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_9', use_bias=False)(x)
  x = BatchNormalization(name='norm_9')(x)
  x = LeakyReLU(alpha=0.1)(x)

  # Layer 10
  x = Conv2D(256, (1,1), strides=(1,1), padding='same', name='conv_10', use_bias=False)(x)
  x = BatchNormalization(name='norm_10')(x)
  x = LeakyReLU(alpha=0.1)(x)

  # Layer 11
  x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_11', use_bias=False)(x)
  x = BatchNormalization(name='norm_11')(x)
  x = LeakyReLU(alpha=0.1)(x)

  # Layer 12
  x = Conv2D(256, (1,1), strides=(1,1), padding='same', name='conv_12', use_bias=False)(x)
  x = BatchNormalization(name='norm_12')(x)
  x = LeakyReLU(alpha=0.1)(x)

  # Layer 13
  x = Conv2D(512, (3,3), strides=(1,1), padding='same', name='conv_13', use_bias=False)(x)
  x = BatchNormalization(name='norm_13')(x)
  x = LeakyReLU(alpha=0.1)(x)

  skip_connection = x

  x = MaxPooling2D(pool_size=(2, 2))(x)

  # Layer 14
  x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_14', use_bias=False)(x)
  x = BatchNormalization(name='norm_14')(x)
  x = LeakyReLU(alpha=0.1)(x)

  # Layer 15
  x = Conv2D(512, (1,1), strides=(1,1), padding='same', name='conv_15', use_bias=False)(x)
  x = BatchNormalization(name='norm_15')(x)
  x = LeakyReLU(alpha=0.1)(x)

  # Layer 16
  x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_16', use_bias=False)(x)
  x = BatchNormalization(name='norm_16')(x)
  x = LeakyReLU(alpha=0.1)(x)

  # Layer 17
  x = Conv2D(512, (1,1), strides=(1,1), padding='same', name='conv_17', use_bias=False)(x)
  x = BatchNormalization(name='norm_17')(x)
  x = LeakyReLU(alpha=0.1)(x)

  # Layer 18
  x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_18', use_bias=False)(x)
  x = BatchNormalization(name='norm_18')(x)
  x = LeakyReLU(alpha=0.1)(x)

  # Layer 19
  x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_19', use_bias=False)(x)
  x = BatchNormalization(name='norm_19')(x)
  x = LeakyReLU(alpha=0.1)(x)

  # Layer 20
  x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_20', use_bias=False)(x)
  x = BatchNormalization(name='norm_20')(x)
  x = LeakyReLU(alpha=0.1)(x)

  # Layer 21
  skip_connection = Conv2D(64, (1,1), strides=(1,1), padding='same', name='conv_21', use_bias=False)(skip_connection)
  skip_connection = BatchNormalization(name='norm_21')(skip_connection)
  skip_connection = LeakyReLU(alpha=0.1)(skip_connection)
  skip_connection = Lambda(space_to_depth_x2)(skip_connection)

  x = concatenate([skip_connection, x])

  # Layer 22
  x = Conv2D(1024, (3,3), strides=(1,1), padding='same', name='conv_22', use_bias=False)(x)
  x = BatchNormalization(name='norm_22')(x)
  x = LeakyReLU(alpha=0.1)(x)

  if anc_box==True:
    # Layer 23
    x = Conv2D(BOX*(4 + 1 + CLASS), (1,1), strides=(1,1), padding='same', name='conv_23')(x)
    output = Reshape((GRID_H, GRID_W,BOX,4 + 1 + CLASS))(x)
  else :
    # Layer 23
    x = Conv2D((4 + 1 + CLASS), (1,1), strides=(1,1), padding='same', name='conv_23')(x)
    output = Reshape((GRID_H, GRID_W,4 + 1 + CLASS))(x)

  # small hack to allow true_boxes to be registered when Keras build the model 
  # for more information: https://github.com/fchollet/keras/issues/2790
  #output = Lambda(lambda args: args[0])([output, true_boxes])#Change :Hasib

  #model = Model([input_image, true_boxes], output)#Change :Hasib
  model = Model(input_image, output)
  return model
  #model.load_weights('/content/drive/My Drive/Data/yolo_net_ep500_act.h5')


class WeightReader:
    def __init__(self, weight_file):
        self.offset = 4
        self.all_weights = np.fromfile(weight_file, dtype='float32')
        
    def read_bytes(self, size):
        self.offset = self.offset + size
        return self.all_weights[self.offset-size:self.offset]
    
    def reset(self):
        self.offset = 4

def load_weights(model,path_wst):
  wt_path = path_wts                      
  weight_reader = WeightReader(wt_path)
  weight_reader.reset()
  nb_conv = 23

  for i in range(1, nb_conv+1):
      conv_layer = model.get_layer('conv_' + str(i))

      if i < nb_conv:
          norm_layer = model.get_layer('norm_' + str(i))

          size = np.prod(norm_layer.get_weights()[0].shape)

          beta  = weight_reader.read_bytes(size)
          gamma = weight_reader.read_bytes(size)
          mean  = weight_reader.read_bytes(size)
          var   = weight_reader.read_bytes(size)

          weights = norm_layer.set_weights([gamma, beta, mean, var])       

      if len(conv_layer.get_weights()) > 1:
          bias   = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[1].shape))
          kernel = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
          kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
          kernel = kernel.transpose([2,3,1,0])
          conv_layer.set_weights([kernel, bias])
      else:
          kernel = weight_reader.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
          kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
          kernel = kernel.transpose([2,3,1,0])
          conv_layer.set_weights([kernel])
  return model



def yolo_loss_1(y_true, y_pred):
      loss=0
          ### adjust w and h
      obj_mask_ex= tf.expand_dims(y_true[..., 4], axis=-1)
      obj_mask= y_true[..., 4]
      #conf_obj_mask=y_true[...,4]
      noobj_mask=np.abs(y_true[...,4]-1)
      
      if anc_box==True and _grid_offset==True:
        _x = tf.to_float(tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]), (1, GRID_H, GRID_W, 1, 1)))#1,13,13,1,1
        _y = tf.transpose(_x, (0,2,1,3,4))#1,13,13,1,1
        _grid = tf.tile(tf.concat([_x,_y], -1), [BATCH_SIZE, 1, 1, 5, 1])#10,13,13,5,1

        pred_xy = tf.sigmoid(y_pred[..., :2]) + _grid
        pred_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(ANCHORS, [1,1,1,BOX,2])
        
      if anc_box==False and _grid_offset==True:
        _x = tf.to_float(tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]), (1, GRID_H, GRID_W, 1)))#1,13,13,1,1
        _y = tf.transpose(_x, (0,2,1,3))#1,13,13,1
        _grid = tf.tile(tf.concat([_x,_y], -1), [BATCH_SIZE, 1, 1, 1])#10,13,13,1

        pred_xy = tf.sigmoid(y_pred[..., :2]) + _grid
        pred_wh = y_pred[..., 2:4]# * np.reshape(ANCHORS, [1,1,1,BOX,2])
        #pred_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(ANCHORS, [1,1,1,BOX,2])
      
      if _grid_offset==False:
        pred_xy=y_pred[...,0:2] #+ cell_grid # if cell_grid Batch_Gen center_x -=grid_x
        pred_wh=y_pred[...,2:4]
      

      true_xy=y_true[...,0:2]
      true_wh=y_true[...,2:4]
      pred_conf=y_pred[...,4]
      ### adjust confidence
      true_wh_half = true_wh / 2.
      true_mins    = tf.subtract(true_xy,true_wh_half)
      true_maxes   = tf.add(true_xy,true_wh_half)
    
      pred_wh_half = pred_wh / 2.
      pred_mins    = tf.subtract(pred_xy,pred_wh_half)
      pred_maxes   = tf.add(pred_xy,pred_wh_half)       
    
      intersect_mins  = tf.maximum(pred_mins,  true_mins)
      intersect_maxes = tf.minimum(pred_maxes, true_maxes)
      intersect_wh    = tf.maximum(intersect_maxes - intersect_mins, 0.)
      intersect_areas = tf.multiply(intersect_wh[..., 0] , intersect_wh[..., 1])
    
      true_areas = tf.multiply(true_wh[..., 0] , true_wh[..., 1])
      pred_areas = tf.multiply(pred_wh[..., 0] , pred_wh[..., 1])

      union_areas =tf.subtract(tf.add(pred_areas,true_areas),intersect_areas)
      intersect_areas=tf.add(intersect_areas,1)
      union_areas=tf.add(union_areas,1)
      iou_scores  = tf.truediv(intersect_areas, union_areas)
      true_box_class = tf.argmax(y_true[..., 5:], -1)
      pred_box_class=y_pred[..., 4]
      pred_box_class = y_pred[..., 5:]
      class_mask = y_true[..., 4] * tf.gather(CLASS_WEIGHTS, true_box_class)
    
      #class_mask = y_true[..., 4] * tf.to_float(true_box_class)
      nb_class_box = tf.reduce_sum(tf.to_float(class_mask > 0.0))
      true_conf =tf.multiply( iou_scores,y_true[..., 4])
      
      loss_bb=tf.subtract(true_xy,pred_xy)
      loss_bb=tf.square(loss_bb)
      loss_bb=tf.multiply(loss_bb,obj_mask_ex)
      loss_bb=tf.reduce_sum(loss_bb)
    
      pred_wh_abs=tf.abs(pred_wh)
      pred_wh_abs=tf.where(tf.equal(pred_wh_abs,0),tf.ones_like(pred_wh_abs),pred_wh_abs)
      pred_wh_sign=tf.truediv(pred_wh,pred_wh_abs)
      loss_wh=tf.subtract(tf.sqrt(true_wh),tf.multiply(pred_wh_sign,tf.sqrt(pred_wh_abs)))
      loss_wh=tf.square(loss_wh)
      loss_wh=tf.multiply(loss_wh,obj_mask_ex)
      loss_wh=tf.reduce_sum(loss_wh)
    
      loss_conf=tf.subtract(true_conf,pred_conf)
      loss_conf=tf.square(loss_conf)
      loss_conf=tf.multiply(loss_conf,obj_mask)
      loss_conf=tf.reduce_sum(loss_conf)
    
      loss_noobj_conf=tf.subtract(true_conf,pred_conf)
      loss_noobj_conf=tf.square(loss_noobj_conf)
      loss_noobj_conf=tf.multiply(loss_noobj_conf,noobj_mask)
      loss_noobj_conf=tf.reduce_sum(loss_noobj_conf)
    
      loss_class = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class)
      loss_class = tf.reduce_sum(loss_class * class_mask) / (nb_class_box + 1e-6)
      loss=lambda_coord*loss_bb+lambda_coord*loss_wh+loss_conf+lambda_noobj*loss_noobj_conf+loss_class       
      return loss

Data Ready Block

In [5]:
if colab_run==True :data_dir='/content/drive/My Drive/CNN_Basic/'
else: data_dir=''
from xml.etree import ElementTree as ET

def read_content(xml_file,_dir):
    objs=[]
    tree = ET.parse(xml_file)
    root = tree.getroot()

    list_with_all_boxes = []
    filename = root.find('filename').text
    size=root.find('size')
    img_h=int(size.find('height').text)
    img_w=int(size.find('width').text)
    for boxes in root.iter('object'):
        
        name = boxes.find('name').text
        ymin, xmin, ymax, xmax = None, None, None, None
        
        for box in boxes.findall("bndbox"):
            ymin = int(box.find("ymin").text)
            xmin = int(box.find("xmin").text)
            ymax = int(box.find("ymax").text)
            xmax = int(box.find("xmax").text)
        wf=IMAGE_W/img_w
        hf=IMAGE_H/img_h
        obj={'name':name,
          'xmin':xmin*wf,
          'ymin':ymin*hf,
          'xmax':xmax*wf,
          'ymax':ymax*hf}
        objs.append(obj)
        
    out={
          'filename':_dir+'JPEGImages/'+filename,
          'height':img_h,
          'width':img_w,
          'object':objs
    }
        
    return filename, list_with_all_boxes,out

def IOU(bboxes1, bboxes2):
        #import pdb;pdb.set_trace()
        x1_min, y1_min, x1_max, y1_max = list(bboxes1)
        x2_min, y2_min, x2_max, y2_max = list(bboxes2)
        xA = np.maximum(x1_min, x2_min)
        yA = np.maximum(y1_min, y2_min)
        xB = np.minimum(x1_max, x2_max)
        yB = np.minimum(y1_max, y2_max)
        interArea = np.maximum((xB - xA ), 0) * np.maximum((yB - yA ), 0)
        boxAArea = (x1_max - x1_min ) * (y1_max - y1_min )
        boxBArea = (x2_max - x2_min ) * (y2_max - y2_min )
        iou = interArea / (boxAArea + boxBArea - interArea)
        return iou
    
def Batch_Gen(all_data,no_of_batch,_dir):
      while(True):
            N=len(all_data)
            _batch_size=N//no_of_batch

            for _ind in range(no_of_batch):

                batch=all_data[_ind*_batch_size:(_ind+1)*_batch_size]
                n=len(batch)
                x_batch = np.zeros((n,IMAGE_H, IMAGE_W,3),dtype=np.float32)                         # input images
                if anc_box==True:
                  y_batch = np.zeros((n, GRID_H, GRID_W,BOX,4+1+len(LABELS)),dtype=np.float)                # desired network output
                else :
                  y_batch = np.zeros((n, GRID_H, GRID_W,4+1+len(LABELS)),dtype=np.float)                # desired network output
                instance_count=0

                
                for sample in batch:

                        image_name = root+_dir+sample['filename']
                        img = cv2.imread(image_name)
                        img = cv2.resize(img, (IMAGE_H,IMAGE_W))
                        img = img[:,:,::-1]
                        img_w=sample['height']
                        img_h=sample['width']
                        all_objs = sample['object']
                        # construct output from object's x, y, w, h
                        true_box_index = 0
                        anchors = [[0, 0, ANCHORS[2*i], ANCHORS[2*i+1]] for i in range(int(len(ANCHORS)//2))]
                        for obj in all_objs:
                            no_gridx=float(IMAGE_W / GRID_W)
                            no_gridy=float(IMAGE_H /GRID_H)
                            if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin'] and obj['name'] in LABELS:
                                center_x = .5*(obj['xmin'] + obj['xmax'])
                                center_x = center_x/no_gridx
                                center_y = .5*(obj['ymin'] + obj['ymax'])
                                center_y=center_y/no_gridy
                                grid_x = int(np.floor(center_x))
                                grid_y = int(np.floor(center_y))
                                if _grid_offset==False:
                                            center_x-=grid_x
                                            center_y-=grid_y
                                #center_x-=grid_x
                                #center_y-=grid_y
                                if grid_x < GRID_W and grid_y < GRID_H:
                                    obj_indx  = LABELS.index(obj['name'])
                                    center_w = (obj['xmax'] - obj['xmin'])/no_gridx #/ (float(self.config['IMAGE_W'])# / self.config['GRID_W']) # unit: grid cell
                                    center_h = (obj['ymax'] - obj['ymin'])/no_gridy #/ (float(self.config['IMAGE_H'])# / self.config['GRID_H']) # unit: grid cell
                                    center_w=center_w
                                    center_h=center_h

                                    box = [center_x, center_y, center_w, center_h]

                                    # find the anchor that best predicts this box#Change :Hasib
                                    best_anchor = -1
                                    max_iou     = -1

                                    shifted_box = [0, 0, center_w, center_h]

                                    for i in range(len(anchors)):
                                        anchor = anchors[i]
                                        iou    = IOU(shifted_box, anchor)

                                        if max_iou < iou:
                                            best_anchor = i
                                            max_iou     = iou

                                    # assign ground truth x, y, w, h, confidence and class probs to y_batch
                                    if anc_box==True:
                                      y_batch[instance_count, grid_y, grid_x, best_anchor,0:4] = box
                                      y_batch[instance_count, grid_y, grid_x, best_anchor,4  ] = 1.
                                      y_batch[instance_count, grid_y, grid_x, best_anchor,5+obj_indx] = 1
                                    else :
                                      y_batch[instance_count, grid_y, grid_x,0:4] = box
                                      y_batch[instance_count, grid_y, grid_x,4  ] = 1.
                                      y_batch[instance_count, grid_y, grid_x,5+obj_indx] = 1

                                    # assign the true box to b_batch
                                    #b_batch[instance_count, 0, 0, 0, true_box_index] = box#Change: Hasib

                                    #true_box_index += 1
                                    #true_box_index = true_box_index % self.config['TRUE_BOX_BUFFER']

                        # assign input image to x_batch
                        x_batch[instance_count] = img/255

                        # increase instance counter in current batch
                        instance_count += 1  

                        #print(' new batch created', idx)
                yield (x_batch, y_batch)

Reading Data

In [6]:
import glob
import cv2
import pandas as pd
import pickle

f=open(root+'VOC/train_voc_dfs','rb')
train_data=pickle.load(f)
f.close()
f=open(root+'VOC/test_voc_dfs','rb')
valid_data=pickle.load(f)
f.close()
f=open(root+'VOC/test_voc_dfs','rb')
test_data=pickle.load(f)
f.close()
In [7]:
import numpy as np



if train==True or train==False:
  
  #train_imgs=all_data[:n_train]
  valid_data=test_data[:1500]
  N=len(train_data)
  n_train=len(train_data)
  n_valid=len(valid_data)
  train_batch=[]
  valid_batch=[]
  no_of_tr_batch=int(np.floor(n_train/BATCH_SIZE))
  train_batch=Batch_Gen(train_data,no_of_tr_batch,train_dir)
  no_of_val_batch=int(np.floor(n_valid/BATCH_SIZE))
  valid_batch=Batch_Gen(valid_data,no_of_val_batch,val_dir)
In [8]:
name=[]
class_wt=pd.Series([0]*len(LABELS),index=LABELS)
for sample in train_data:  
    names=[obj['name'] for obj in sample['object']] 
    for label in names:
        class_wt[label]+=1
class_weights=class_wt.sum()-class_wt#.values
class_weights=class_weights/class_weights.max()
#CLASS_WEIGHTS=np.array(class_weights.values,dtype=np.float32)

for i in range(30,60): image_h=(test_data[i]['height']/416) image_w=(test_data[i]['width']/416) img=cv2.imread(test_data[i]['filename']) for obj in test_data[i]['object']: xmin,ymin,xmax,ymax=list(obj.values())[1:] cv2.rectangle(img, (int(xminimage_w),int(yminimage_h)), (int(xmaximage_w),int(ymaximage_h)), (0,255,0), 3) plt.imshow(img) plt.show()

image_h=(test_data[i]['height']/416) test_data[i]['height']/416

data_check=True if data_check==True: sampleX,sampleY=next(train_batch) plt.imshow(sampleX[3]) plt.show() img=sampleY[3][...,3,11+5] plt.imshow(img) plt.show() for i in range(4,5): print('index',i) plt.imshow(sampleY[3][...,3,i]*10) plt.show()

Train Block

In [10]:
model=build_model()
model.summary()
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
==================================================================================================
input_1 (InputLayer)            (None, 416, 416, 3)  0                                            
__________________________________________________________________________________________________
conv_1 (Conv2D)                 (None, 416, 416, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
norm_1 (BatchNormalization)     (None, 416, 416, 32) 128         conv_1[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu (LeakyReLU)         (None, 416, 416, 32) 0           norm_1[0][0]                     
__________________________________________________________________________________________________
max_pooling2d (MaxPooling2D)    (None, 208, 208, 32) 0           leaky_re_lu[0][0]                
__________________________________________________________________________________________________
conv_2 (Conv2D)                 (None, 208, 208, 64) 18432       max_pooling2d[0][0]              
__________________________________________________________________________________________________
norm_2 (BatchNormalization)     (None, 208, 208, 64) 256         conv_2[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_1 (LeakyReLU)       (None, 208, 208, 64) 0           norm_2[0][0]                     
__________________________________________________________________________________________________
max_pooling2d_1 (MaxPooling2D)  (None, 104, 104, 64) 0           leaky_re_lu_1[0][0]              
__________________________________________________________________________________________________
conv_3 (Conv2D)                 (None, 104, 104, 128 73728       max_pooling2d_1[0][0]            
__________________________________________________________________________________________________
norm_3 (BatchNormalization)     (None, 104, 104, 128 512         conv_3[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_2 (LeakyReLU)       (None, 104, 104, 128 0           norm_3[0][0]                     
__________________________________________________________________________________________________
conv_4 (Conv2D)                 (None, 104, 104, 64) 8192        leaky_re_lu_2[0][0]              
__________________________________________________________________________________________________
norm_4 (BatchNormalization)     (None, 104, 104, 64) 256         conv_4[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_3 (LeakyReLU)       (None, 104, 104, 64) 0           norm_4[0][0]                     
__________________________________________________________________________________________________
conv_5 (Conv2D)                 (None, 104, 104, 128 73728       leaky_re_lu_3[0][0]              
__________________________________________________________________________________________________
norm_5 (BatchNormalization)     (None, 104, 104, 128 512         conv_5[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_4 (LeakyReLU)       (None, 104, 104, 128 0           norm_5[0][0]                     
__________________________________________________________________________________________________
max_pooling2d_2 (MaxPooling2D)  (None, 52, 52, 128)  0           leaky_re_lu_4[0][0]              
__________________________________________________________________________________________________
conv_6 (Conv2D)                 (None, 52, 52, 256)  294912      max_pooling2d_2[0][0]            
__________________________________________________________________________________________________
norm_6 (BatchNormalization)     (None, 52, 52, 256)  1024        conv_6[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_5 (LeakyReLU)       (None, 52, 52, 256)  0           norm_6[0][0]                     
__________________________________________________________________________________________________
conv_7 (Conv2D)                 (None, 52, 52, 128)  32768       leaky_re_lu_5[0][0]              
__________________________________________________________________________________________________
norm_7 (BatchNormalization)     (None, 52, 52, 128)  512         conv_7[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_6 (LeakyReLU)       (None, 52, 52, 128)  0           norm_7[0][0]                     
__________________________________________________________________________________________________
conv_8 (Conv2D)                 (None, 52, 52, 256)  294912      leaky_re_lu_6[0][0]              
__________________________________________________________________________________________________
norm_8 (BatchNormalization)     (None, 52, 52, 256)  1024        conv_8[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_7 (LeakyReLU)       (None, 52, 52, 256)  0           norm_8[0][0]                     
__________________________________________________________________________________________________
max_pooling2d_3 (MaxPooling2D)  (None, 26, 26, 256)  0           leaky_re_lu_7[0][0]              
__________________________________________________________________________________________________
conv_9 (Conv2D)                 (None, 26, 26, 512)  1179648     max_pooling2d_3[0][0]            
__________________________________________________________________________________________________
norm_9 (BatchNormalization)     (None, 26, 26, 512)  2048        conv_9[0][0]                     
__________________________________________________________________________________________________
leaky_re_lu_8 (LeakyReLU)       (None, 26, 26, 512)  0           norm_9[0][0]                     
__________________________________________________________________________________________________
conv_10 (Conv2D)                (None, 26, 26, 256)  131072      leaky_re_lu_8[0][0]              
__________________________________________________________________________________________________
norm_10 (BatchNormalization)    (None, 26, 26, 256)  1024        conv_10[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_9 (LeakyReLU)       (None, 26, 26, 256)  0           norm_10[0][0]                    
__________________________________________________________________________________________________
conv_11 (Conv2D)                (None, 26, 26, 512)  1179648     leaky_re_lu_9[0][0]              
__________________________________________________________________________________________________
norm_11 (BatchNormalization)    (None, 26, 26, 512)  2048        conv_11[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_10 (LeakyReLU)      (None, 26, 26, 512)  0           norm_11[0][0]                    
__________________________________________________________________________________________________
conv_12 (Conv2D)                (None, 26, 26, 256)  131072      leaky_re_lu_10[0][0]             
__________________________________________________________________________________________________
norm_12 (BatchNormalization)    (None, 26, 26, 256)  1024        conv_12[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_11 (LeakyReLU)      (None, 26, 26, 256)  0           norm_12[0][0]                    
__________________________________________________________________________________________________
conv_13 (Conv2D)                (None, 26, 26, 512)  1179648     leaky_re_lu_11[0][0]             
__________________________________________________________________________________________________
norm_13 (BatchNormalization)    (None, 26, 26, 512)  2048        conv_13[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_12 (LeakyReLU)      (None, 26, 26, 512)  0           norm_13[0][0]                    
__________________________________________________________________________________________________
max_pooling2d_4 (MaxPooling2D)  (None, 13, 13, 512)  0           leaky_re_lu_12[0][0]             
__________________________________________________________________________________________________
conv_14 (Conv2D)                (None, 13, 13, 1024) 4718592     max_pooling2d_4[0][0]            
__________________________________________________________________________________________________
norm_14 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_14[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_13 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_14[0][0]                    
__________________________________________________________________________________________________
conv_15 (Conv2D)                (None, 13, 13, 512)  524288      leaky_re_lu_13[0][0]             
__________________________________________________________________________________________________
norm_15 (BatchNormalization)    (None, 13, 13, 512)  2048        conv_15[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_14 (LeakyReLU)      (None, 13, 13, 512)  0           norm_15[0][0]                    
__________________________________________________________________________________________________
conv_16 (Conv2D)                (None, 13, 13, 1024) 4718592     leaky_re_lu_14[0][0]             
__________________________________________________________________________________________________
norm_16 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_16[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_15 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_16[0][0]                    
__________________________________________________________________________________________________
conv_17 (Conv2D)                (None, 13, 13, 512)  524288      leaky_re_lu_15[0][0]             
__________________________________________________________________________________________________
norm_17 (BatchNormalization)    (None, 13, 13, 512)  2048        conv_17[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_16 (LeakyReLU)      (None, 13, 13, 512)  0           norm_17[0][0]                    
__________________________________________________________________________________________________
conv_18 (Conv2D)                (None, 13, 13, 1024) 4718592     leaky_re_lu_16[0][0]             
__________________________________________________________________________________________________
norm_18 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_18[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_17 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_18[0][0]                    
__________________________________________________________________________________________________
conv_19 (Conv2D)                (None, 13, 13, 1024) 9437184     leaky_re_lu_17[0][0]             
__________________________________________________________________________________________________
norm_19 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_19[0][0]                    
__________________________________________________________________________________________________
conv_21 (Conv2D)                (None, 26, 26, 64)   32768       leaky_re_lu_12[0][0]             
__________________________________________________________________________________________________
leaky_re_lu_18 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_19[0][0]                    
__________________________________________________________________________________________________
norm_21 (BatchNormalization)    (None, 26, 26, 64)   256         conv_21[0][0]                    
__________________________________________________________________________________________________
conv_20 (Conv2D)                (None, 13, 13, 1024) 9437184     leaky_re_lu_18[0][0]             
__________________________________________________________________________________________________
leaky_re_lu_20 (LeakyReLU)      (None, 26, 26, 64)   0           norm_21[0][0]                    
__________________________________________________________________________________________________
norm_20 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_20[0][0]                    
__________________________________________________________________________________________________
lambda (Lambda)                 (None, 13, 13, 256)  0           leaky_re_lu_20[0][0]             
__________________________________________________________________________________________________
leaky_re_lu_19 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_20[0][0]                    
__________________________________________________________________________________________________
concatenate (Concatenate)       (None, 13, 13, 1280) 0           lambda[0][0]                     
                                                                 leaky_re_lu_19[0][0]             
__________________________________________________________________________________________________
conv_22 (Conv2D)                (None, 13, 13, 1024) 11796480    concatenate[0][0]                
__________________________________________________________________________________________________
norm_22 (BatchNormalization)    (None, 13, 13, 1024) 4096        conv_22[0][0]                    
__________________________________________________________________________________________________
leaky_re_lu_21 (LeakyReLU)      (None, 13, 13, 1024) 0           norm_22[0][0]                    
__________________________________________________________________________________________________
conv_23 (Conv2D)                (None, 13, 13, 125)  128125      leaky_re_lu_21[0][0]             
__________________________________________________________________________________________________
reshape (Reshape)               (None, 13, 13, 5, 25 0           conv_23[0][0]                    
==================================================================================================
Total params: 50,676,061
Trainable params: 50,655,389
Non-trainable params: 20,672
__________________________________________________________________________________________________
In [11]:
for layer in model.layers[-31:]:
        print(layer)
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001D2DE3CABA8>
<tensorflow.python.keras.layers.normalization.BatchNormalization object at 0x000001D2DE47DA90>
<tensorflow.python.keras.layers.advanced_activations.LeakyReLU object at 0x000001D2DE449B70>
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001D2DE4A4518>
<tensorflow.python.keras.layers.normalization.BatchNormalization object at 0x000001D2DE4A4D30>
<tensorflow.python.keras.layers.advanced_activations.LeakyReLU object at 0x000001D2DE523E48>
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001D2DE579978>
<tensorflow.python.keras.layers.normalization.BatchNormalization object at 0x000001D2DE592EB8>
<tensorflow.python.keras.layers.advanced_activations.LeakyReLU object at 0x000001D2DE62C860>
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001D2DE660B38>
<tensorflow.python.keras.layers.normalization.BatchNormalization object at 0x000001D2DE6FDBA8>
<tensorflow.python.keras.layers.advanced_activations.LeakyReLU object at 0x000001D2DE654978>
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001D2DE763A58>
<tensorflow.python.keras.layers.normalization.BatchNormalization object at 0x000001D2DE72B2E8>
<tensorflow.python.keras.layers.advanced_activations.LeakyReLU object at 0x000001D2DE72B5F8>
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001D2DE840E10>
<tensorflow.python.keras.layers.normalization.BatchNormalization object at 0x000001D2DE8085F8>
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001D2DE9CBA90>
<tensorflow.python.keras.layers.advanced_activations.LeakyReLU object at 0x000001D2DE808438>
<tensorflow.python.keras.layers.normalization.BatchNormalization object at 0x000001D2DEA63B38>
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001D2DE8C16D8>
<tensorflow.python.keras.layers.advanced_activations.LeakyReLU object at 0x000001D2DE9C05C0>
<tensorflow.python.keras.layers.normalization.BatchNormalization object at 0x000001D2DE967860>
<tensorflow.python.keras.layers.core.Lambda object at 0x000001D2DEA98320>
<tensorflow.python.keras.layers.advanced_activations.LeakyReLU object at 0x000001D2DE8E2898>
<tensorflow.python.keras.layers.merge.Concatenate object at 0x000001D2DEA98E48>
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001D2DEAEC5F8>
<tensorflow.python.keras.layers.normalization.BatchNormalization object at 0x000001D2DEB49EF0>
<tensorflow.python.keras.layers.advanced_activations.LeakyReLU object at 0x000001D2DEB49BA8>
<tensorflow.python.keras.layers.convolutional.Conv2D object at 0x000001D2DEB7FC88>
<tensorflow.python.keras.layers.core.Reshape object at 0x000001D2DEC13CF8>
In [12]:
def train_model(model):
  #filepath="D:/"+exp_name+"weights-improvement-{epoch:02d}-{val_loss:.2f}.hdf5"
  filepath=root+exp_name+"best.hdf5"
  checkpoint = ModelCheckpoint(filepath, monitor='val_loss', verbose=1, save_best_only=True, mode='min')
  callbacks_list = [checkpoint]
  model.compile(loss=yolo_loss_1, optimizer=optimizer,metrics=['accuracy'])
  if anc_box==True:
    model.compile(loss=yolo_loss_1, optimizer=optimizer,metrics=['accuracy'])
  history=model.fit_generator(train_batch,
                              steps_per_epoch=no_of_tr_batch,
                              epochs=epochs,
                              callbacks=callbacks_list,
                              validation_data=valid_batch,
                              validation_steps=no_of_val_batch)

  
  return model,history
In [13]:
if train==True:
    model=load_weights(model,path_wts)
    for layer in model.layers[:-31]:
        layer.trainable=False
    model,history=train_model(model)
    
    
    plt.plot(history.history['acc'])
    plt.plot(history.history['loss'])
    plt.title('model accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
    # summarize history for loss
    plt.plot(range(5,epochs),history.history['loss'][5:])
    plt.plot(range(5,epochs),history.history['val_loss'][5:])
    plt.title('model loss')
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train_loss', 'validation_loss'], loc='upper left')
    plt.savefig(root+exp_name+'yolo-v1_exp.png')
    plt.show()
    

    f=open(root+exp_name+'history:_ep_70_v-1.0_exp','wb')
    pickle.dump([history.history['loss'],history.history['val_loss']],f)
    f.close()
    val_loss=np.array(history.history['val_loss'])
    best_epoch=np.argmin(val_loss) 
    print(best_epoch,val_loss[best_epoch])
if test_mode==1:
    model.load_weights(path_wts_final)
if test_mode==2:
    model=load_weights(model,path_wts)

Predicion Block

In [15]:
def _softmax(x, axis=-1, t=-100.):
    x = x - np.max(x)
    
    if np.min(x) < t:
        x = x/np.min(x)*t
        
    e_x = np.exp(x)
    
    return e_x / e_x.sum(axis, keepdims=True)

def _sigmoid(x):
    return 1. / (1. + np.exp(-x))

def nmax_supp(boxes):
    df=pd.DataFrame(boxes,columns=['x','y','w','h','conf','_class'])
    sdf=df.sort_values('conf',ascending=False).reset_index(drop=True)
    #print(sdf)
    for ind1 in range(len(sdf)-1):
        box1=sdf.loc[ind1,['x','y','w','h']].values
        b1_class=sdf.loc[ind1,['_class']].values
        for ind2 in range(ind1+1,len(sdf)):
            b2_class=sdf.loc[ind2,['_class']].values
            b2_conf=sdf.loc[ind2,['conf']].values
            if b2_class==b1_class and b2_conf>0.0:
                 box2=sdf.loc[ind2,['x','y','w','h']].values
                 if b1_class==b2_class and IOU(box1,box2)>=0.1:
                    sdf.loc[ind2,'conf']=0.0
    ndf=sdf[sdf.conf>0.0]
    return list(ndf.values)#[:len(ndf.values)//2]

def decode_netout_anc(netout, anchors, nb_class, obj_threshold=0.3, nms_threshold=0.3):
    #grid_h, grid_w, nb_box = netout.shape[:3]
    grid_h, grid_w = netout.shape[:2]
    
    boxes = []
    # decode the output by the network
    netout[..., 4]  = _sigmoid(netout[..., 4])
    netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
    netout[..., 5:] *= netout[..., 5:] > obj_threshold

    for row in range(grid_h):
        for col in range(grid_w):
            for b in range(BOX):
                # from 4th element onwards are confidence and class classes
                classes = netout[row,col,b,5:]
                #classes = netout[row,col,5:]
                confidence = netout[row,col,b,4]
                if np.sum(classes) > 0:
                    # first 4 elements are x, y, w, and h
                    #x, y, w, h = netout[row,col,b,:4]
                    x, y, w, h = netout[row,col,b,:4]
                    #print(col,_sigmoid(x-col),row,_sigmoid(y),w,h)
                    x = (col + _sigmoid(x)) / grid_w # center position, unit: image width
                    y = (row + _sigmoid(y)) / grid_h # center position, unit: image height
                    w = ANCHORS[2 * b + 0] * np.exp(w) / grid_w # unit: image width
                    h = ANCHORS[2 * b + 1] * np.exp(h) / grid_h # unit: image height
                    #print(x,y,w,h)
                    classes=np.argmax(classes)
                    
                    box = (x-w/2, y-h/2, x+w/2, y+h/2, confidence, classes)
                    box = (x-w/2, y-h/2, x+w/2, y+h/2, confidence, classes)
                    if abs(box[0])<=1 and abs(box[1])<=1 and box[2]<=1 and box[3]<=1 :
                      if (box[0])>=0 and (box[1])>=0 and box[2]>=0 and box[3]>=0 :
                        boxes.append(box)
                    

                      
    f_boxes=nmax_supp(boxes)
    return f_boxes    
    
def decode_netout_1(netout, anchors, nb_class, obj_threshold=0.3, nms_threshold=0.3):
    #grid_h, grid_w, nb_box = netout.shape[:3]
    grid_h, grid_w = netout.shape[:2]
    boxes = []
    
    # decode the output by the network
    netout[..., 4]  = _sigmoid(netout[..., 4])
    netout[..., 5:] = netout[..., 4][..., np.newaxis] * _softmax(netout[..., 5:])
    netout[..., 5:] *= netout[..., 5:] > obj_threshold
    class_prob_log=[0]*CLASS
    #boxes=[(0,)*6]*CLASS
    boxes=[]
    for row in range(grid_h):
        for col in range(grid_w):
            #for b in range(BOX):
                # from 4th element onwards are confidence and class classes
                classes = netout[row,col,5:]
                #classes = netout[row,col,5:]
                confidence = netout[row,col,4]
                if np.sum(classes) > 0:
                    # first 4 elements are x, y, w, and h
                    #x, y, w, h = netout[row,col,b,:4]
                    x, y, w, h = netout[row,col,:4]
                    #print(col,_sigmoid(x-col),row,_sigmoid(y),w,h)
                    w_ratio=(IMAGE_W/GRID_W)/IMAGE_W
                    h_ratio=(IMAGE_W/GRID_W)/IMAGE_H
                    x = (col+_sigmoid(x))*(h_ratio)#x*(32/416) # center position, unit: image width
                    y = (row+_sigmoid(y))*(w_ratio)#y*(32/416) # center position, unit: image height
                    w = w*(w_ratio) # unit: image width
                    h = h*(h_ratio) # unit: image height
                    #print(x,y,w,h)
                    class_ind=np.argmax(classes)
                    #if class_prob_log[class_ind]< classes[class_ind]:       
                    class_prob_log[class_ind]=classes[class_ind]
                    box = (x-w/2, y-h/2, x+w/2, y+h/2, confidence, class_ind)
                    if abs(box[0])<=1 and abs(box[1])<=1 and box[2]<=1 and box[3]<=1 :
                        if (box[0])>=0 and (box[1])>=0 and box[2]>=0 and box[3]>=0 :
                                       boxes.append(box)
                                       #boxes[class_ind]=box

                      
    
    f_boxes=nmax_supp(boxes)
    return f_boxes#[:5]   

def draw_boxes_1(image, boxes, labels,t_lbl=None):
    image_h, image_w, _ = image.shape

    for box in boxes:
        xmin = int(box[0]*image_w)
        ymin = int(box[1]*image_h)
        xmax = int(box[2]*image_w)
        ymax = int(box[3]*image_h)
        x_off=20
        y_off=20
        #print(xmin,ymin,xmax,ymax)
        #print(box)
        #cv2.rectangle(image, (xmin-x_off,ymin-y_off), (xmax+x_off,ymax+y_off), (255,0,0), 3)
        cv2.rectangle(image, (xmin,ymin), (xmax,ymax), (0,255,0), 3)
        #if t_lbl !=None:
        #  cv2.rectangle(image, (t_lbl[0],t_lbl[1]), (t_lbl[2],t_lbl[3]), (0,0,255), 3)
        cv2.putText(image, 
                    labels[int(box[5])] + ' ' + str(box[4]), 
                    (xmin, ymin - 13), 
                    cv2.FONT_HERSHEY_SIMPLEX, 
                    0.0015 * 400, 
                    (0,255,0), 2)
        
    return image

"""# Perform detection on image"""
error=0
count=0
import pandas as pd
from copy import copy, deepcopy
from tqdm import tqdm,trange
result_df=pd.DataFrame({},columns=['class','iou','correct','True','img','boxx1','boxy1','boxx2','boxy2'])
ind_loc=0
for i in range(0,len(test_data)):
  count+=1
  sample=test_data[i]
  image = cv2.imread(root+val_dir+sample['filename'])
  dummy_array = np.zeros((1,1,1,1,TRUE_BOX_BUFFER,4))

  

  input_image = cv2.resize(image, (IMAGE_W, IMAGE_H))
  input_image = input_image / 255.
  input_image = np.expand_dims(input_image, 0)
  
  netout = model.predict(input_image)
  

  
  if anc_box==True:
    boxes = decode_netout_anc(netout[0], 
                        obj_threshold=.30,
                        nms_threshold=.90,
                        anchors=ANCHORS, 
                        nb_class=CLASS)
  else:
    boxes = decode_netout_1(netout[0], 
                        obj_threshold=0.30,
                        nms_threshold=0.90,
                        anchors=ANCHORS, 
                        nb_class=CLASS)
   
  true_lbls=sample['object']

  true_lb={}
  pred_lb={}
  for lbl in LABELS:
      true_lb[lbl]=[]
      pred_lb[lbl]=[]
    
  for lbl in true_lbls:
          [name,xmin,ymin,xmax,ymax]=list(lbl.values())
          true_lb[name].append([(xmin/IMAGE_W),(ymin/IMAGE_H), (xmax/IMAGE_W),(ymax/IMAGE_H)])#[cx,cy,iw,ih]
            
  for box in boxes:
          [xmin,ymin,xmax,ymax,conf,name]=box
          pred_lb[LABELS[int(name)]].append([(xmin),(ymin), (xmax),(ymax)])#[cx,cy,iw,ih]
  _ious=[]
  
  for _class in LABELS:
        if len(true_lb[_class])>0 and len(pred_lb[_class])>0:
            temp=deepcopy(pred_lb[_class])
            #_max_iou=0
            for true_box in true_lb[_class]:
                _max_iou=0
                _box=[0,0,0,0]
                for _ind,pred_box in enumerate(temp):
                    _iou= IOU(true_box,pred_box)
                    if _iou>=_max_iou:
                            temp_ind=_ind
                            _max_iou=_iou
                            _box=pred_box
                
                #if len(temp)>0:del temp[temp_ind]
                _ious.append([_class,_max_iou]) 
                result_df.loc[ind_loc,['class','iou','True','img','boxx1','boxy1','boxx2','boxy2']]=[_class,_max_iou,1.0,i,_box[0],_box[1],_box[2],_box[3]]
            if _max_iou>=0.4:
                    result_df.loc[ind_loc,'correct']=1
            else :
                    result_df.loc[ind_loc,'correct']=0
            ind_loc+=1
            #result_df.loc[ind_loc:ind_loc+len(temp),'correct']=0
            #result_df.loc[ind_loc:ind_loc+len(temp),'class']=_class
            #result_df.loc[ind_loc:ind_loc+len(temp),'True']=0
            #ind_loc+=len(temp)
          
        elif len(true_lb[_class])==0 and len(pred_lb[_class])>0:
            for _box in pred_lb[_class]:
                result_df.loc[ind_loc,'correct']=0
                result_df.loc[ind_loc,['class','iou','True','img','boxx1','boxy1','boxx2','boxy2']]=[_class,0.0,0.0,i,_box[0],_box[1],_box[2],_box[3]]
                ind_loc+=1
        elif len(true_lb[_class])>0 and len(pred_lb[_class])==0:
            for _box in true_lb[_class]:
                result_df.loc[ind_loc,'correct']=0
                result_df.loc[ind_loc,['class','iou','True','img','boxx1','boxy1','boxx2','boxy2']]=[_class,0.0,1.0,i,_box[0],_box[1],_box[2],_box[3]]
                ind_loc+=1
    
  image = draw_boxes_1(image, boxes, LABELS,true_lb)
  #plt.figure(figsize=(10,10))
  #print('row',i)
  if i<100:
      _image=cv2.resize(image,(200,200))
      if i%3==0 :fig,axes=plt.subplots(nrows=1,ncols=3,figsize=(20,5))  
      axes[i%3].imshow(_image[:,:,::-1])
      axes[i%3].set_title(str(i))#+','+str(_ious))
      if i%3==3-1 :plt.show()
  
In [16]:
result_df.head(30)
Out[16]:
class iou correct True img boxx1 boxy1 boxx2 boxy2
0 person 0 0 1 0 5.44781e-05 5.76923e-05 0.00239704 0.00239423
1 dog 0 0 1 0 0.000326869 0.00115385 0.0013279 0.00178365
2 train 0 0 1 1 0.000997417 0.000961538 0.00148536 0.00144712
3 chair 0 0 1 2 0.00114904 0.001 0.00147596 0.0013141
4 sofa 0 0 1 2 0.000591346 0.00099359 0.00103365 0.00125
5 car 0 0 1 3 0.268814 0.795905 0.368429 0.872661
6 chair 0 0 0 3 0.820061 0.506166 0.91068 0.67307
7 chair 0 0 1 4 0.271157 0.52293 0.348824 0.77184
8 diningtable 0 0 1 4 0.000740385 0.00133974 0.00177404 0.00240385
9 pottedplant 0 0 1 4 0.000899038 0.000865385 0.00135577 0.00155128
10 person 0 0 0 5 0.363047 0.0447975 0.745999 0.657207
11 person 0 0 0 5 0.0118355 0.6252 0.0707624 0.993128
12 chair 0 0 1 5 0.154326 0.543236 0.254162 0.75701
13 person 0 0 1 6 0.468308 0.211093 0.622391 0.314778
14 horse 0 0 1 6 0.000590776 0.000485777 0.00175196 0.00213842
15 person 0 0 0 7 0.297347 0.191488 0.597548 0.675161
16 cat 0 0 1 7 0.000605769 0.000378383 0.00158654 0.00228514
17 cow 0 0 1 8 0.0014375 0.00102564 0.00214423 0.00161538
18 person 0 0 1 9 0.647713 0.0298006 0.712577 0.202384
19 bus 0 0 1 9 0.000346154 0.00117666 0.00145192 0.00164588
20 car 0 0 1 9 0.000889423 0.00140044 0.00240385 0.00228113
21 car 0 0 1 9 0.002 0.00129938 0.00240385 0.00160256
22 car 0 0 1 9 0.000783654 0.0014221 0.00128365 0.00176138
23 chair 0 0 0 9 0.481481 0.771564 0.810229 0.965858
24 bicycle 0 0 1 10 0.000370192 0.000871795 0.00173077 0.00229487
25 dog 0 0 1 11 0.000196103 0.000253036 0.00226468 0.00235324
26 person 0 0 1 12 0.395016 0.168758 0.509324 0.632857
27 horse 0 0 1 12 0.000326923 0.000745772 0.00176923 0.00204906
28 person 0 0 1 13 0.0187527 0.241412 0.0750474 0.640956
29 cow 0 0 1 13 9.61538e-06 0.000538462 0.000283654 0.00158974
In [17]:
def get_average_precision(res_class):
    total_true=res_class['True'].sum()
    res_class['precision']=0
    res_class['recall']=0
    right=0
    for i in range(len(res_class)):
        if res_class.loc[i,'correct']==1 : right+=1
        res_class.loc[i,'precision']=right/(i+1)
        res_class.loc[i,'recall']=right/total_true
        #print(i,right,right/total_true)
    #res_class.tail()
    AP=0
    res_class['auc']=res_class.precision.diff()
    indexes=res_class[res_class['auc']< 0.0].index.values-1
    res_class['new']=res_class.recall.diff()
    res_class['new']=res_class['new']*res_class.precision
    
    #_,bins=pd.cut(res_class.recall,bins=11,retbins=True)
    #bins=np.array(range(-1,11))/10
    for i,ind in enumerate(indexes[:-1]):
        #indexes=res_class.recall> bins[i-1] 
        #indexes &=res_class.recall<=bins[i]
        _precision=res_class.loc[indexes[i+1],'precision']
        delta_recall=abs(res_class.loc[indexes[i+1],'recall']-res_class.loc[indexes[i],'recall'])
        AP+=_precision*delta_recall
        #print(AP)
    #AP=res_class['new'].sum()
    return (AP),res_class
#res_class=res_grp.get_group('person').reset_index(drop=True)
#AP,log_df=get_average_precision(res_class)
#print(AP)
In [18]:
counts=result_df['class'].value_counts()
res_grp=result_df.groupby('class')
mAP=0
log_dfs={}
APS={}
for label in LABELS:
    res_class=res_grp.get_group(label).reset_index(drop=True)
    AP,log_df=get_average_precision(res_class)
    mAP+=AP*counts[label]
    print(label,AP,counts[label])
    log_dfs[label]=log_df
    APS[label]=AP
mAP=mAP/counts.sum()
APS['mAP']=mAP
print('mAP:',mAP)
person 0 3863
bird 0 576
cat 0 370
cow 0 329
dog 0 530
horse 0 395
sheep 0 311
aeroplane 0 311
bicycle 0 389
boat 0 393
bus 0 255
car 0 1176
motorbike 0 369
train 0 303
bottle 0 649
chair 0 1940
diningtable 0 299
pottedplant 0 608
sofa 0 398
tvmonitor 0 361
mAP: 0.0
In [ ]:
import pickle
f=open(root+exp_name+'mAP','wb')
pickle.dump(APS,f)
f.close()
AP_df=pd.DataFrame(APS,index=[0]).transpose()
AP_df.plot.bar()
In [ ]:
for key in log_dfs.keys():
    log_df=log_dfs[key]
    plt.plot(log_df.recall,log_df.precision)
    plt.title(key+' recall vs precision')
    plt.savefig(root+exp_name+'prec_recall.png')
    plt.show()
In [ ]:
person 0.23582652758625772 488
bird 0.332090913347574 79
cat 0.18148535796562115 70
cow 0.06029040404040405 25
dog 0.1516882331595831 106
horse 0.4364105863449367 60
sheep 0.1462189126662811 29
aeroplane 0.4483907957122242 42
bicycle 0.2560289590314319 61
boat 0.08323100111647512 72
bus 0.5177437023222662 34
car 0.33919941600292774 180
motorbike 0.1333783255675688 58
train 0.39176817652218116 48
bottle 0.06894994994194856 77
chair 0.11418658570179668 166
diningtable 0.10972335190273155 68
pottedplant 0.025058248997451477 118
sofa 0.11977448238452026 92
tvmonitor 0.2633423105658475 78
mAP: 0.21215717095920034
In [ ]:
res_class=res_grp.get_group('person').reset_index(drop=True)
res_class.correct.sum()/res_class['True'].sum()
In [ ]:
res_class.head(30)
In [ ]:
#model.save(root+exp_name+'70.h5')

Extra Debug

In [ ]:
GRID_W=7
GRID_H=7
BATCH_SIZE=10
cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(GRID_W), [GRID_H]), (1, GRID_H, GRID_W, 1, 1)))
cell_y = tf.transpose(cell_x, (0,2,1,3,4))
cell_grid = tf.tile(tf.concat([cell_x,cell_y], -1), [BATCH_SIZE, 1, 1, 5, 1])

#pred_xy = tf.sigmoid(y_pred[..., :2]) + cell_grid
#pred_wh = tf.exp(y_pred[..., 2:4]) * np.reshape(ANCHORS, [1,1,1,BOX,2])
sess=tf.Session()
x,y,grid=sess.run([cell_x,cell_y,cell_grid])
In [ ]:
x.shape,y.shape,grid.shape
In [ ]:
z=np.concatenate([x,y],-1)
z.shape
In [ ]:
y_true=np.ones((7,7,10))
ob_mask= tf.expand_dims(y_true[..., 4], axis=-1)
sess=tf.Session()
_mask=sess.run(ob_mask)
_mask.shape,y_true[..., 4].shape
In [ ]:
import numpy as np
import tensorflow as tf
label=np.ones((7,7,15))
label[...,14]=5
true_box=np.argmax(label[...,10:],-1)
In [ ]:
wts=np.ones((10))
wts[3:7]=9
res=tf.gather(wts,true_box)
sess=tf.Session()
ret=sess.run(res)
ret
In [ ]:
ret.shape,label[...,4:].shape,label[...,5:].shape
In [ ]:
import tensorflow as tf
X=tf.zeros((3,3))
Y=tf.equal(X,1)
A=tf.where(Y,X,tf.ones_like(X))
sess=tf.Session()
Z=sess.run(A)
Z 
In [ ]:
import numpy as np
def IOU(bboxes1, bboxes2):
        #import pdb;pdb.set_trace()
        x1_min, y1_min, x1_max, y1_max = list(bboxes1)
        x2_min, y2_min, x2_max, y2_max = list(bboxes2)
        xA = np.maximum(x1_min, x2_min)
        yA = np.maximum(y1_min, y2_min)
        xB = np.minimum(x1_max, x2_max)
        yB = np.minimum(y1_max, y2_max)
        interArea = np.maximum((xB - xA ), 0) * np.maximum((yB - yA ), 0)
        boxAArea = (x1_max - x1_min ) * (y1_max - y1_min )
        boxBArea = (x2_max - x2_min ) * (y2_max - y2_min )
        iou = interArea / (boxAArea + boxBArea - interArea)
        return iou
IOU([0,0,10,10],[0,0,20,20])

Extra functions

In [ ]:
import csv

def read_data(dataset):
  f=open(dataset+'/label.csv')
  file=csv.reader(f,delimiter=',')
  data=[]
  i=0
  sc=416.0
  for line in file:
      dt=line
      H=dt[0]
      W=dt[1]
      xmin=(float(dt[2]))/sc
      ymin=(float(dt[3]))/sc
      xmax=(float(dt[4]))/sc
      ymax=(float(dt[5]))/sc

      output={
          'filename':dataset+'/images/'+str(i)+'.jpg',
          'height':H,
          'width':W,
          'object':[{'name':'None',
          'xmin':xmin*IMAGE_W,
          'ymin':ymin*IMAGE_H,
          'xmax':xmax*IMAGE_W,
          'ymax':ymax*IMAGE_H}]
          }

      data.append(output)
      i=i+1
  return data